// Title      : Study.java
// Author     : James Baird
// Created    : Thursday, 19th June 2003
// Description: Sample the corpus

import java.io.FileWriter;
import java.io.IOException;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;

public class Study
{
  private static final String corpusPathname = "../../Data/Corpus/CorpusXML.zip";
  private static final int minimumPostLength = 8;

  public static void sample(int sampleSize, String outfilePathroot)
  {
    System.out.println("Study two sample");
    
    System.out.println("");
  
    System.out.println("Corpus = " + corpusPathname);
    System.out.println("Sample size = " + sampleSize);
    System.out.println("Output file root = " + outfilePathroot);

    System.out.print("Building index");

    Vector groups = Group.getIndex(corpusPathname);

    System.out.println("");

    System.out.println("Group count = " + groups.size());
    
    System.out.println("");
    
    System.out.print("Accumulating");
    
    try
    {
      Hashtable history = new Hashtable();
    
      Random random = new Random();
      
      for (int i = 0; i < sampleSize; i++)
      {
        int randomGroup;
        Group group;
        int randomPost;
        String historyKey;
        Posts posts;

        int number = i + 1;

        String numberString;
        if (number < 10)
          numberString = "00" + number;
        else if (number < 100)
          numberString = "0" + number;
        else
          numberString = "" + number;

        String outfilePathname = outfilePathroot + "-" + numberString + ".txt";

        FileWriter outfile = new FileWriter(outfilePathname);
      
        do
        {
          randomGroup = random.nextInt(groups.size() - 1);
        
          group = (Group)groups.elementAt(randomGroup);

          if (group.getWindowSize() == 1)
            randomPost = 0;
          else
            randomPost = random.nextInt(group.getWindowSize() - 1);
        
          historyKey = group.getName() + randomPost;
        }
        while (history.get(historyKey) != null);
        
        history.put(historyKey, "x");
        
        posts = group.getPosts(randomPost);
        
        System.out.print(".");
        
        outfile.write(posts.toString() + System.getProperty("line.separator"));
          
        outfile.flush();
        outfile.close();
      }
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    
    System.out.println("");
  }

  public static void main(String[] args)
  {
    if (args.length > 1)
      sample(Integer.parseInt(args[0]), args[1]);
  }
}
